Dimensionality reductions of selected tissues from The Cancer Genome Atlas (TCGA) and The Genotype Tissue Expression Consortium (GTEx) using a gene subset from The Cancer Gene Census, which is collated by the Catalogue of Somatic Mutations in Cancer (COSMIC).
# Set autoreload module for dev
%load_ext autoreload
%autoreload 2
%aimport rnaseq_lib
from __future__ import division
import rnaseq_lib as r
import numpy as np
import pandas as pd
import holoviews as hv
hv.extension('bokeh')
## Synapse ID: syn11515015
df_path = '/mnt/rnaseq-cancer/Objects/tcga-gtex-metadata-expression.tsv'
df = pd.read_csv(df_path, sep='\t', index_col=0, dtype=r.tissues.dtype)
# Plotting wrapper for dataframe
h = r.plot.Holoview(df)
# Load cosmic data and get genes
cosmic_path = '/mnt/rnaseq-cancer/Metadata/cosmic_all_1-26-2018.tsv'
cosmic = pd.read_csv(cosmic_path, sep='\t', index_col=0)
cosmic_genes = [x for x in cosmic.index.tolist() if x in df.columns]
print 'Number of filtered cosmic genes: {}'.format(len(cosmic_genes))
%%time
title = ' of TCGA and GTEx Subset by {} Cosmic Genes'.format(len(cosmic_genes))
color_indices = ['tissue', 'labels', 'type']
tsne = h.tsne(genes=cosmic_genes, title='Cosmic')
%%opts Overlay [tabs=True] Scatter [width=1000 height=800]
hv.Overlay([tsne.opts(dict(Scatter=dict(plot=dict(color_index=x)))).relabel('{}:'.format(x)) for x in color_indices])
%%time
title = ' of TCGA and GTEx Subset by {} Cosmic Genes'.format(len(cosmic_genes))
trimap = h.trimap(genes=cosmic_genes, title='Cosmic')
%%opts Overlay [tabs=True] Scatter [width=1000 height=800]
hv.Overlay([trimap.opts(dict(Scatter=dict(plot=dict(color_index=x)))).relabel('{}:'.format(x)) for x in color_indices])